import jieba
import time
from utils.dict_saver_loader import *
start=time.time()
text_dir='../../hard_drive/entropy/news2016zh_valid_smallft.txt'
dict_txt_dir=text_dir.split('.txt')[0]+'_dict.txt'
f=open(text_dir,'r',encoding='utf-8')
l=f.readline()
word_show_time_dict={}
line_processed_num=0
while l:
    seg=jieba.cut(l,cut_all=False)
    seg_list=list(seg)
    for word in seg_list:
        if word not in word_show_time_dict:
            word_show_time_dict[word]=1
        else:
            word_show_time_dict[word]+=1
    line_processed_num+=1
    if line_processed_num%1000==0:
        print(line_processed_num//1000,'k lines processed')
    l=f.readline()
save_dict(word_show_time_dict,dict_txt_dir)
end=time.time()
print('time usage:',end-start)